Data Visual

In [7]:
import os
import cv2
import glob
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns 

%matplotlib inline  

加载数据集

In [18]:
dir = "/ext/Data/distracted_driver_detection/"

driver_imgs_list_csv = os.path.join(dir, "driver_imgs_list.csv")
df = pd.read_csv(driver_imgs_list_csv)
#driver_list = df.groupby('subject', as_index=False)['img'].count()
driver_list = df.groupby('subject')['img'].count()
print(driver_list)
print("drivers count = %d"%len(driver_list))

sns.countplot(y='subject', data=df, orient="h")
sns.plt.show()
subject
p002     725
p012     823
p014     876
p015     875
p016    1078
p021    1237
p022    1233
p024    1226
p026    1196
p035     848
p039     651
p041     605
p042     591
p045     724
p047     835
p049    1011
p050     790
p051     920
p052     740
p056     794
p061     809
p064     820
p066    1034
p072     346
p075     814
p081     823
Name: img, dtype: int64
drivers count = 26
In [22]:
class_list = df.groupby('classname')['img'].count()
print(class_list)
print("classes count = %d"%len(class_list))

sns.countplot(x='classname', data=df)
sns.plt.show()
classname
c0    2489
c1    2267
c2    2317
c3    2346
c4    2326
c5    2312
c6    2325
c7    2002
c8    1911
c9    2129
Name: img, dtype: int64
classes count = 10
In [25]:
sns.plt.figure(figsize=(16, 32))
sns.countplot(y='subject',  hue='classname', data=df)
sns.plt.show()
In [4]:
def show_images(classname):
    images = []
    drivers = []
    for driver in driver_list.index:
        item0 = df[(df["subject"]==driver) & (df["classname"]==classname)].head(1)
        image = os.path.join(dir,"train",item0["classname"].values[0],item0["img"].values[0])
        drivers.append(driver)
        images.append(image)

    plt.figure(figsize=(16, 16))
    for i in range(len(images)):
            plt.subplot(6, 5, i+1)
            img = cv2.imread(images[i])
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            plt.title(drivers[i])
            plt.axis('off')
            plt.imshow(img)
In [5]:
show_images("c0")
In [6]:
show_images("c1")
In [7]:
show_images("c2")
In [8]:
show_images("c3")
In [9]:
show_images("c4")
In [10]:
show_images("c5")
In [11]:
show_images("c6")
In [12]:
show_images("c7")
In [13]:
show_images("c8")
In [14]:
show_images("c9")

the same driver

In [15]:
images = []
begin = 550
for i in range(20):
    item = df[(df["subject"]=="p002")].iloc[begin+i:begin+i+1]
    image = os.path.join(dir,"train",item["classname"].values[0],item["img"].values[0])
    images.append(image)
print(len(images))

plt.figure(figsize=(16, 10))
for i in range(len(images)):
        plt.subplot(4, 5, i+1)
        img = cv2.imread(images[i])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        plt.axis('off')
        plt.imshow(img)
20

the test data

In [18]:
begin = 2000
images = glob.glob(os.path.join(dir, "test/test/", "*"))[begin:begin+30]

plt.figure(figsize=(16, 16))
for i in range(len(images)):
        plt.subplot(6, 5, i+1)
        img = cv2.imread(images[i])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        plt.axis('off')
        plt.imshow(img)
In [ ]: